{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "dir=r\"D:\\ZSL_ImageGame\\DatasetA_train_20180813\\\\\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "np.set_printoptions(threshold=np.inf)\n",
    "import pandas as pd\n",
    "pd.set_option('max_colwidth',200)\n",
    "import matplotlib.pyplot as plt\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# train.txt分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data=pd.read_csv(dir+\"train.txt\",sep='\\t',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>38221</td>\n",
       "      <td>38221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>38221</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>66713c5c295fe7a4cacb21ab79c6eff0.jpeg</td>\n",
       "      <td>ZJL116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>1</td>\n",
       "      <td>227</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            0       1\n",
       "count                                   38221   38221\n",
       "unique                                  38221     190\n",
       "top     66713c5c295fe7a4cacb21ab79c6eff0.jpeg  ZJL116\n",
       "freq                                        1     227"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ZJL1</th>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL10</th>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL100</th>\n",
       "      <td>199</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL101</th>\n",
       "      <td>195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL102</th>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0\n",
       "1          \n",
       "ZJL1    203\n",
       "ZJL10   171\n",
       "ZJL100  199\n",
       "ZJL101  195\n",
       "ZJL102  198"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.groupby(1).count().iloc[:5,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD9CAYAAACyYrxEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl8FeW9x/HPLwsJWUgIJAIhQCAgKihoRAUVUVut9qqt1WJ7taiVqtirvV21i1avva3eeq/UrbTainVf6lK11gXXshgUEMQl7BGEgBYjSMjy3D/OjJmcTHISSM5JTr7v1yuvM3nmmZnfLOd3nvPMnBlzziEiIskrJdEBiIhI11KiFxFJckr0IiJJToleRCTJKdGLiCQ5JXoRkSQXM9GbWYmZzTOzlWa2wswu9cqvMbNlZrbEzP5hZkO8cjOz2WZW6Y0/uKtXQkREWmexrqM3s8HAYOfcG2aWCywGTgOqnHOfeHX+A9jfOXehmZ0EfBc4CTgMuNE5d1hXroSIiLQuZoveObfJOfeGN1wDrASK/STvyQb8T4xTgbkuYgGQ731YiIhIAqR1pLKZjQAmAgu9/68FzgG2A9O8asXAhsBkVV7Zpr0LVURE9kS7E72Z5QAPA5f5rXnn3E+Bn5rZ5cAlwJWAhUzeon/IzGYCMwGys7MPGTt2bMejFxHpxRYvXrzVOVcYq167Er2ZpRNJ8nc75x4JqXIP8CSRRF8FlATGDQU2Rk/gnJsDzAEoLy93FRUV7QlFREQ8ZrauPfXac9WNAbcDK51zNwTKRweqnQK84w0/DpzjXX1zOLDdOaduGxGRBGlPi34KcDbwlpkt8cquAM43s32BRmAdcKE37ikiV9xUAjuBczs1YhER6ZCYid459yrh/e5PtVLfAbP2Mi4REekk+mWsiEiSU6IXEUlySvQiIklOiV5EJMn16ES/ZusOfvnECuoaGhMdiohIt9XDE/2n/Om1tTz65geJDkVEpNvq0Yl+2r5F7D+4H7e+uIqGxrbvwiki0lv16ERvZsyaVsbqrTu4Z9F61m7dQazbLouI9DY9OtEDnDhuEKMKs/n5o8s55n9e5C8L2nXrBxGRXqPHJ/rUFONPMyYx+6yJHDQ0j1teXMXuep2cFRHx9fhEDzBsQBanHDSE731hDJu27+Kvb1YlOiQRkW4jKRK9b+qYQsYX5/FfT67k5Nmv8NgSXY0jIpJUid7MuOqU/Tl85AA2f7KLu+arv15EJKkSPcAhwwv4wznlnH7wUJZVbWdXXUOiQxIRSaikS/S+Q0cUsLuhkSUb/pXoUEREEiqpE70ZvL7mo0SHIiKSUEmb6POy0tl3n1wWrVWiF5HeLWkTPcCk0gLeWPcx9brpmYj0Ykmf6HfsbuD1tR8nOhQRkYRJ6kR/7NgiBmT34fcvr0p0KCIiCZPUiT6rTxrnHVnKi+9Ws/yD7YkOR0QkIdISHUBXO/uI4dz20iq+c9diivv3BWBQv0x+e+ZBpKcm9eeciAiQ5C16gH6Z6Vz5bwdQUtCXFIO6hkYeX7pRDysRkV7DusP928vLy11FRUVcluWc4+TZr7KrroFn/3MqqSkWl+WKiHQ2M1vsnCuPVS/pW/TRgg8r+fXTK3locZWeOSsiSS1mojezEjObZ2YrzWyFmV3qlV9vZu+Y2TIz+6uZ5QemudzMKs3sXTM7oStXYE+cOG4QYwfl8odX1vCDB5fqYSUiktTa06KvB77vnNsPOByYZWb7A88C45xzBwLvAZcDeOOmAwcAJwK3mFlqVwS/p1JTjMcumcKiK47j0BH9mfPyaj2sRESSVsxE75zb5Jx7wxuuAVYCxc65fzjn6r1qC4Ch3vCpwH3OuVrn3BqgEpjU+aHvnYy0VIr6ZXLJsaPZtH0X97++nh219bEnFBHpYTrUR29mI4CJwMKoUecBT3vDxcCGwLgqr6xbOnr0QMYX5/Hzx1ZwwJXPqBtHRJJOuxO9meUADwOXOec+CZT/lEj3zt1+UcjkLS7tMbOZZlZhZhXV1dUdi7oTmRk3Tp/Az07ej+L8vjy/cnPCYhER6Qrt+sGUmaUTSfJ3O+ceCZR/C/gycJxruk6zCigJTD4U2Bg9T+fcHGAORC6v3KPoO8nIwhxGFuawqnoHf1u6kYZGp8suRSRptOeqGwNuB1Y6524IlJ8I/Bg4xTm3MzDJ48B0M8sws1JgNLCoc8PuGoeVFlBTW887H34Su7KISA/Rnhb9FOBs4C0zW+KVXQHMBjKAZyOfBSxwzl3onFthZg8AbxPp0pnlnOsRz/M7tLQAgEVrPuKAIXkJjkZEpHPETPTOuVcJ73d/qo1prgWu3Yu4EqI4vy/F+X15fe1HnDulNNHhiIh0il73y9hYJpUWsHD1Rzy25APWb9sZewIRkW5OiT7K0WMGsm3Hbi69bwnn/nkRjY2JvxeQiMjeUKKPctqEYl750TSuOW0cq6p38PcVHyY6JBGRvaJEH8XMKCnI4huThlE6MJubXqiksdHhXORPRKSnSfoHj+yp1BTjoqmj+NHDyxh5RdN55x+fOJaLjhmVwMhERDpGib4NXz24mJraemp21QHw0nvV3PpiJf9++DByM9MTHJ2ISPso0bchLTWF849suszyuLH78G83vcpdC9Zx8TFlCYxMRKT9lOg7YPzQPKaOKeT3L61m5aYaANJTjMuOH8OwAVkJjk5EJJxOxnbQD764L4P6ZbLig+2s+GA7f1u2if/5x7uJDktEpFVq0XfQ+KF5PPO9oz///7+fXskfXl7Nf35hDCMGZicwMhGRcGrR76VvHzmS9NQU/vvplTz39maee3sz81dt06WYItJtqEW/lwpzM/jGYcP402treWZF073sH77oCA4ZXpDAyEREIpToO8HlX9qP0w8einPwWV0DZ/5+PvNXbVOiF5FuQYm+E/RJS2FccdNtjffdJ5eFaz7ikgTGJCLiUx99F5hUWsAb6z6mvqEx0aGIiCjRd4VDSwvYsbuBtzfpSVUiknhK9F1g0oimJ1WJiCSa+ui7wKC8TIYPyOKehetZVf0pAIPz+vLdY8vwHrsoIhI3SvRd5MzyEu7851qeX7mF+kbHRzt2M744j2ljixIdmoj0MtYdfthTXl7uKioqEh1Gl6lraOSY619kUF4mD114hFr1ItIpzGyxc648Vj216OMgPTWF70wdyS8eW8F9r29gxICmWyWkGBxUkk9memoCIxSRZKZEHydnlpdw0wuVXP7IWy3GnTellF/82/4JiEpEegMl+jjJTE/l0VlTWLdtZ7PyufPXcs+idVw8bRQDczISE5yIJDUl+jgakt+XIfl9m5UV9cvg7ys+5I5X1/CjE8cmKDIRSWYxE72ZlQBzgUFAIzDHOXejmZ0BXAXsB0xyzlUEprkcOB9oAP7DOfdMF8SeFEYV5nDSuMHMeXk1Dy2uarVeXt907rngcApz1eoXkY5pT4u+Hvi+c+4NM8sFFpvZs8By4KvA74OVzWx/YDpwADAEeM7MxjjnGjo39OTxky+NpX92Og2N4VdANTbCg4s38MdXV3P5l/aLc3Qi0tPFTPTOuU3AJm+4xsxWAsXOuWeBsEsFTwXuc87VAmvMrBKYBMzvzMCTSUlBFv912vg263xW18Bf5q/joqmjyM/qE6fIRCQZdKiP3sxGABOBhW1UKwYWBP6v8spkL8yaVsbjSzdy6X1LGF2UA0BpYTbfPGx4giMTke6u3YnezHKAh4HLnHNt3a0r7NdALfokzGwmMBNg2LBh7Q2j19p3UC5fLy/hb8s2UrH2I+obHbX1jYwvzuPAofmJDk9EurF23dTMzNKJJPm7nXOPxKheBZQE/h8KbIyu5Jyb45wrd86VFxYWtjfeXu03XzuQFVefyIqrT6TiZ8fTLzONm+dVJjosEenm2nPVjQG3Ayudcze0Y56PA/eY2Q1ETsaOBhbtVZTSQm5mOjMmj2D2C5U8v3IzRbmZ7Z7WDMYOyiUtVTcvFekN2tN1MwU4G3jLzJZ4ZVcAGcDvgELgSTNb4pw7wTm3wsweAN4mcsXOLF1x0zXOnVLK7a+u4fw7O36foEumlfGDE/btgqhEpLvRTc16uPc317T4tW0sf1m4joq1H/Paj48lLyu9iyITka6mm5r1EqP3yWX0PrkdmmZIfl9Omv0Kd85fy38cN7prAhORbkOJvhfaf0g/jhtbxM3zKnn0zQ9i1j/5wMF8/4vq5hHpqZToe6nLTxrLzfNWUd/Kr3F967bt4JYXV3FmeQklBVlxik5EOpMSfS9VVpTL/359Qsx6H27fxdHXzeO2l1Zx7Vfa/vWuiHRPSvTSpkF5mZx+yFAerKhiQHYfzAwz+PKBQyjzfqErIt2bEr3EdPExo3hy2UZmv9D046yX36vm4Ysm67GIIj2AEr3EVFKQxbKrTvj8/7vmr+Xnj61g/uptTB41MHGBiUi7KNFLh51RXsLsFyqZ/fz7DOoX/ovcof2z6JOmX96KdAdK9NJhmempXHBUKb966h2O/e1LoXVOmzCE/5s+Mc6RiUgYJXrZI+dOKWVo/yzqGhpbjJv3zpbILZWPH0PpwOwERCciQUr0skfSU1M4afzg0HGTRw3k6eUfcuuLlVz3tYPiHJmIRFOil05XmJvBWZOGcdeCdazcVNOhaftn9+GWbx5MToYOTZHOoneTdImLp41iS80udtW17NppTX2j4+X3qvnLgnVcOHVUF0Yn0rso0UuXKMrN5JZvHtLh6c6+fSF/fGU1MyaPIDM9tQsiE+l9lOilW5k1rYzpcxbw078uZ+ygjt2VM1pKinHahCEMyMnopOhEeiYleulWDist4MiygTz8RlWnzO+dTZ9w/Rk6ISy9mxK9dCtmxtzzJrGzbu8fSnbd39/hnoXrufT40QztrztvSu+lRC/dTkqKdcpVNxdOHcW9i9Zz87xKvnf8mBbjzYyBOX10vx5Jekr0krSG5PflqxOHcu+iDdy7aENonR98cQyXHKunbElyU6KXpHb5SWOZOCyfhpBnIz+2ZCN/eGUNM6aU6rp9SWo6uiWp5Wf1YfqkYaHjxg3J49SbX9N1+5L0lOil1zqoJJ+jRg/k5hcqefHdLa3W65eZzm/PPIjczPQ4RifSeXQfWenVfnziWMYPzaPREfrX0Oj4x9ubmTt/XaJDFdljatFLrzauOI97Lji8zToz/rSI219dw7lTRpDVR28Z6Xl01IrEcMm0Mr5223yu+dvbHDg0v9m4qWMKGZLfN0GRibRPzERvZiXAXGAQ0AjMcc7daGYFwP3ACGAtcKZz7mOLXJR8I3ASsBOY4Zx7o2vCF+l65SMKOGr0wNDLNA8rLeD+7xyRoMhE2qc9Lfp64PvOuTfMLBdYbGbPAjOA551zvzaznwA/AX4MfAkY7f0dBtzqvYr0WHfMOJRtn+5uVvbwG1Vc/8y7VKz9iPIRBQmKTCS2mCdjnXOb/Ba5c64GWAkUA6cCd3rV7gRO84ZPBea6iAVAvpmFP6FCpIdIT01hUF5ms79zp4ygILsPv3uhkh219a3+7eqE2zmI7I0O9dGb2QhgIrAQ2Mc5twkiHwZmVuRVKwaC32+rvLJNexusSHeS1SeN848s5fpn3uWAK59ptV5qivHAdw7nkOFq9UtitDvRm1kO8DBwmXPukzbuDxI2osXPEs1sJjATYNiw8B+0iHR35x9ZSnafVHaHPDvXN+fl1fzfc+9z1/nqwZTEaFeiN7N0Ikn+bufcI17xZjMb7LXmBwP+L06qgJLA5EOBjdHzdM7NAeYAlJeXt/x9ukgPkJmeyowppW3WaXTw66ffYemGf3FQSX6bdUW6QnuuujHgdmClc+6GwKjHgW8Bv/ZeHwuUX2Jm9xE5Cbvd7+IR6Y3+/fDh3PriKi69703KinJajO+TlsJPT96fYl2mKV2kPS36KcDZwFtmtsQru4JIgn/AzM4H1gNneOOeInJpZSWRyyvP7dSIRXqYnIw0rjhpLHPnr2PT9l0txr+3uYbcjHR+87UDExCd9AbmQu7qF2/l5eWuoqIi0WGIJMQvHlvOvYvW89IPp+nHV9IhZrbYOVceq55+GSuSYN+ZOop7Fq7n2idXcsK4QR2adv/BuZQV7d2zdSX5KdGLJFhxfl/OKI88IOXJtzp2OqswN4NXfjSNzPTULopOkoESvUg3cPWp4/j2USPpSE/qyk2f8N173+TBig2cfcSILotNej4lepFuID01hVGFLa/IacuowmzueG0Nt720mumThpGeqruOSzglepEeysy4ZFoZ599ZweifPh2zfmFuBn+/9CgG5GTEITrpTpToRXqwY8cWcc2pB1AddcO1aHUNjdz20irueG0NPzxhbJyik+5CiV6kBzOzdvfPr9u2g7n/XMfMo0eR11ePRexNlOhFeomLjynjqbc+5Nt3vt7sen0DZkwpZYJuz5C0lOhFeolxxXmcffhwXnm/muqa2s/Lt9TUsmbrDh6dNYU2blYoPZgSvUgvcs1p41qU3bNwPVf89S1erdzKUaMLExCVdDUlepFe7vRDipn9/Pvc8Ox71LVxu+W2TCzpT//sPp0cmXQWJXqRXi4jLZULp47kqife5rw/79k9p44eU8jc8yZ1cmTSWZToRYRzjhjBoaUF1Dd0/CaHTy//kNteWqX77XdjSvQiQkqKccCQvD2adlRRDvcuWs9N8yr5wzkxb6QoCaDfTIvIXsnJSGPG5BE8+/Zmxl35DN++83W6w+3PpYla9CKy1y44eiT1jY1UbvmUZ1ZsZv6qbUwuG5josMSjFr2I7LWcjDR+eMJYbpw+kaLcDG6aV5nokCRALXoR6TSZ6alccNRIrn1qJf/5wJJm98nPSEvh0uNGk5+lyzDjTYleRDrVNw4bxuNLN/Lye1ublW/bUUtGWio/+ZJuqhZvSvQi0qmyM9J44rtHtii/5J43+MuCdVw0dRR5WbqpWjypj15E4mLWtDI+ra3nhmffZeHqbSxcvY13P6xJdFi9glr0IhIX+w3uxxf234c756/jzvnrADCDx2cdyfihe3YNv7SPEr2IxM0NZx7EW1XbAWhwjovvfoOb51Vy29mHJDiy5KZELyJxk5uZ3uz6+hmTR/C7Fyp5f3MNo/fJTWBkyS1mH72Z3WFmW8xseaDsIDObb2ZvmdkTZtYvMO5yM6s0s3fN7ISuClxEer5zp5TSNz2Vr976Tw7/1fMc/qvnOfv2hTQ06pe1nak9J2P/DJwYVfZH4CfOufHAX4EfApjZ/sB04ABvmlvMLBURkRAF2X24/owDOWncYKaOKeSgkjxeeX8rT721KdGhJZWYXTfOuZfNbERU8b7Ay97ws8AzwM+BU4H7nHO1wBozqwQmAfM7K2ARSS5fPnAIXz5wCACNjY4v/O9L3Dyvki8fOFhPvOoke9pHvxw4BXgMOAMo8cqLgQWBelVemYhITCkpxsXHlPH9B5fyw4eWURB4mElqinHu5BEU9ctMYIQ9054m+vOA2Wb2C+BxYLdXHvbxG9rZZmYzgZkAw4YN28MwRCTZnDJhCHMXrOPJZc27bz6ra2Dbp7Vc97WDEhRZz7VHid459w7wRQAzGwOc7I2qoql1DzAU2NjKPOYAcwDKy8t15kVEAEhPTeGxWVNalF/52HLuXrieS48fQ3F+3wRE1nPtUaI3syLn3BYzSwF+BtzmjXocuMfMbgCGAKOBRZ0SqYj0ajOnjuLuhev532ffY8bkETHrjyzMJquPriCHdiR6M7sXOAYYaGZVwJVAjpnN8qo8AvwJwDm3wsweAN4G6oFZzrmGrghcRHqX4vy+nH7wUO6v2MBDi6ti1j92bBF3zDg0DpF1f9YdngRTXl7uKir27KHEItJ7fFpbz4JV22iMkbdeeX8rdy1YxxOXJPftFcxssXMu5vMb9b1GRHqMnIw0jt9/n5j1jhg1gMeWfKDbK3iU6EUk6eRmpjNj8ghmv1DJcb99sdm4AdkZ3D6jnNzM3nOrZCV6EUlK5x81kk3bd7Gzruk0YUOD4+8rPuSuBeu4+JiyBEYXX0r0IpKU8vqmc/0ZLa+5/9Ydi7j9lTWcO7mUvn16xx1alOhFpFe55NgyzrhtPj96eBllhTkAjN4nh5PGD05wZF1HiV5EepVDRxRw7Nginlja9FvOFIPnv38MpQOzExhZ11GiF5Fe5/ZvleNfobn101qOum4et75YmbS3V1CiF5Fex8zwb4xZ1C+T6YeWcPfC9Zw1aRj9s5pupJaWahTn9+3xd9FUoheRXm/m1FHcs2g9X7nlny3G/eb08Xz90J5940UlehHp9Yrz+3LfzCNY/9GOZuV/fGUNv3uhkq8ePJT01PY8p6l7UqIXEQEOGd6fQ4b3b1aWm5HOt+dW8PiSjZx+yNAERbb3lOhFRFpx3H5FjB2Uy9V/e5u589cCkNUnjdlnTaQwNyOhsXVEz/0uIiLSxcyMq045gEOG96d/dh/ys/qwcM025ry8KtGhdYha9CIibTh85AAOHzng8/8vu+9N7l64nouOKWv2qMPuTC16EZEOuHhaGTt3N/CzR9/iT6+t4eMdu2NPlGBK9CIiHTBmn1xOnTCEp976kF8+8TbXPfNOokOKSYleRKSD/u/rE1h21Rf5xmHDeGhxFZu2f5bokNqkRC8i0kFmRr/MdC6aOopGBzfPq6S6ppbGxsQ/sS+MEr2IyB4qKcjitAnF/GXBeg699jl+8NDSRIcUSoleRGQv/Ozk/fjVV8Zz0vhBPPrmB6zZuiP2RHGmRC8ishf6Z/fhG4cN45enjCM9NYVbX6xMdEgt6Dp6EZFOUJib8fldMKs+/oyDh/XnByfsm+iwALXoRUQ6zcXTyphcNpDqmlpumlfJ8g+2JzokQIleRKTT7NMvk7nnTeKhiyaTm5HGLd2kG0eJXkSkk+X1TeecycN5evmHVG6pSXQ4sRO9md1hZlvMbHmgbIKZLTCzJWZWYWaTvHIzs9lmVmlmy8zs4K4MXkSkuzpvSilpKcaDi6sSHUq7WvR/Bk6MKrsO+KVzbgLwC+9/gC8Bo72/mcCtnROmiEjPMiAngwOH5vP6mo8SHUrsRO+cexmIjtQB/bzhPMB/nPqpwFwXsQDIN7PBnRWsiEhPcuiIApZVbeez3Q0JjWNP++gvA643sw3A/wCXe+XFwIZAvSqvTESk1zmstID6RsebGz5OaBx7mugvAr7nnCsBvgfc7pWHPSo99OYPZjbT69+vqK6u3sMwRES6r4OH98cMFiW4+2ZPE/23gEe84QeBSd5wFVASqDeUpm6dZpxzc5xz5c658sLCwj0MQ0Sk+8rrm87YQf14fW3PTPQbgane8LHA+97w48A53tU3hwPbnXOb9jJGEZEe67DSAl5f+zEXzK3gyWWJSYcxb4FgZvcCxwADzawKuBK4ALjRzNKAXUSusAF4CjgJqAR2Aud2QcwiIj3GaROLeWP9xyxYtY3qmlpOPjD+16fETPTOubNaGXVISF0HzNrboEREksWEknwev+RIfv7och598wOcc5iFnc7sOvplrIhIHJQV5VBTW8+Wmtq4L1uJXkQkDsqKcgCo3PJp3JetRC8iEgdK9CIiSa4oN4PcjDQlehGRZGVmjCrKUaIXEUlmZUU5VFYr0YuIJK2yohyqa2rZ/lldXJerRC8iEidlhYk5IatELyISJyMGZgFQ9fHOuC5XiV5EJE4KczMBqI7zj6aU6EVE4qRfZhoZaSlx/3WsEr2ISJyYGYW5GWrRi4gks6LcDLbU7IrrMpXoRUTiqDA3gy2fqEUvIpK0inIzqf5UiV5EJGkV5Wbwr5111NY3xG2ZSvQiInFUmJsBwNZPd8dtmUr0IiJxVNQvkui3fBK/E7JK9CIicVSYE/nRVDyvpVeiFxGJI79FH89r6ZXoRUTiaEB2H8zUohcRSVppqSkMyO6jFr2ISDIrzM2kOo6/jlWiFxGJs8LcjO7VdWNmd5jZFjNbHii738yWeH9rzWxJYNzlZlZpZu+a2QldFbiISE9VFOfbIKS1o86fgZuAuX6Bc+7r/rCZ/RbY7g3vD0wHDgCGAM+Z2RjnXPx+AiYi0s2V9M9ic80uPtvdQN8+qV2+vJgteufcy8BHYePMzIAzgXu9olOB+5xztc65NUAlMKmTYhURSQplRTk4B6vi9KDwve2jPwrY7Jx73/u/GNgQGF/llYmIiKesKPLs2J6S6M+iqTUPYCF1XNiEZjbTzCrMrKK6unovwxAR6TlGDMwixeL3kPA9TvRmlgZ8Fbg/UFwFlAT+HwpsDJveOTfHOVfunCsvLCzc0zBERHqcjLRUhg/I7v6JHjgeeMc5VxUoexyYbmYZZlYKjAYW7U2AIiLJaFRhTvdJ9GZ2LzAf2NfMqszsfG/UdJp32+CcWwE8ALwN/B2YpStuRERaKivKYe22HdQ3NHb5smJeXumcO6uV8hmtlF8LXLt3YYmIJLeyohzqGhzrPtrJqMKcLl2WfhkrIpIAo70rb+LRfaNELyKSAKOU6EVEkltORhqnThhCcX7fLl9We26BICIiXeDG6RPjshy16EVEkpwSvYhIklOiFxFJckr0IiJJToleRCTJKdGLiCQ5JXoRkSSnRC8ikuTMudDngsQ3CLNqYN0eTj4Q2Br1SivD7S3r6vGJWGZ3jEnL7F0xaZltT7MnhjvnYj/QwznXo/+AiujX1obbW9bV4xOxzO4Yk5bZu2LSMtuepiv/1HUjIpLklOhFRJJcMtzUbE4rr60Nt7esq8cnYpndMSYts3fFpGW2Pb5LdIuTsSIi0nXUdSMikuQS3nVjZl8BbgAGEfngSQcsoUGJiHQdBzR4w9E5+CpgHnAzMC5Qvgn4NnANkEokT/7OOXdbexaY8Ba9c+6vzrlS51xf51wGcCHwPrAI2AzsBP5IZOP8C/gYaARWArXAZ0SuQW0AtgM1QL1Xx1fvve7wFxsoaxZOyHh/PrsD4xu95fmvYWqj5gmwKzC9P9/g9MH5+/83RsUFkXUmqm6wPFg/OC9/nXaHxBac14ch6xFWHyL7w5/WH78tMD56OwL4z057t5X57qYlf/qdgeHgPnSBsuC6ttY3GV1eEyhvCJQ1eGU7vbLg/qrzXoPbyF+33TRdGx1cVnA7BMv9fbc1JLaHab5+EDn+34uaFpp7e9joAAAIPUlEQVR+jxLcH3WB8dHHUnCeweMyjAtM157n30XPq7VpKkPmH/b+aWxl2OfvK384TNj4xqhXaNpmnxB+DEW/bx3w+5Dxa0Km3UBkW+wErvTK/GP+HGC9t/xGIsfgd7xx1wCTnXMTgMOAn5jZkJD5t5DwRB9kZmOAq4FcIq38AUSS/deItPJXAf284Ztp/oZOJXLgZ3n/+wd/I02fmv4Os0BZ8E3g2x0Y7++w9MCyjMi2M1p/Y2R4r8FklBmIwy9vCJRZVJk/fwv8D/BWYJ5+WR3gP5MsLKnuamX+PkfTtsgNlPvHSHA7Bdepj/e6OzC/foF5pnrDwf2R4w3nBeIIzjOdlvw3fkogpuDxWx949fdvHeHfDv06wQ8kf3/VBWLe7Q3/KzC+MjCNv/yPAmWrvddgSy2YFKoCw35suwPzfzMk5iGBMn9/ZgHDQ5ZVHJi3P01bjRqIrIefrKL3d/BYagjMc01UeZjo/FITGA4e9/mBYaP59ghui9WBYb88GF9w/qtC4gkeD6khZf46O5oaZcF1Dm4zf/rg8oPr69ddHVJWR6TRWQ2cEVieI7JfhxHZj7VEPhQ+Ap4ABgMF3jwy6Ej+7uoL9dv7R+TNvZjIDvomsAzYSCRBbPZW+hNvI+0Epnvj/LI6Ii36euADmlrgwb+GkLJYf61NU+/tgI7Oz3mxNXagfnQMtXu43I4sMziN/80lbHxdO5fZnm3fGPUaK/aw7eDX8/d/fTuW29ayattYx1jbM2x8rO0QNr6t5USPi7U/4nHM7M08/Lr+t/LWpg17f9cFpo0e19r86kKWHTaPjm6H9hwbwfzRQORYWwH8ksgHwW4iue4x4BdEejpOJpIbdwKzeuIPpq4h0jp8FRhPpHWeQeTT9B2aWnK7ad5CyaCphZZCZGNl0LzLwm8NhrVsYlnjvQa7bvCW1S+qLEzYV/U0mrcSoucR/S3B309+q3gX4aK7PILzDX6tjRVz0KfE/krvz9Ofb3Db+zEFW2DR3Sr+tNHfXIKC8Ud3e7lAmT9temBc8LW1eQZbsMHlpBBpvfnzD3bTBLtMfAsDwzVR4+pp/Rj05xvWOg7G7s8zuL7B8WHHRnAbLIkaF71fo7dXcF+01nKPFXNr36rCYohuqVsr9cK69/xvNik0/5blTxs8xnypgeEGwr/tQuvvmeA33bBuveAx5vuAyPpnEGmoNhBp3fvH7Fe8uGqJ5L59AP/hsh865w4EyoBvmdk+rcTVTLdI9GZ2DPDvRLoe7gNOB/5GUzfNfCIrnkJTnzDe/2lE3nD+G3KnN13wK2hNoL4v+IaNFjyghnuv9TTvA3fe8lo7cezv6OAywxJZ2MEX/D/4hviN97qllViDB230fFKI9P2FLd8Xtk2yvPn6XSHR9fz1C3YVhJUFu41Sosqik1VYIm8I1PPPIfhdVcHlBO2kaZu0tp/88rXea10gvnqa9vF2ryyYyP34gh9eWYHx/aLqbqf1BOd3gUWfZ/GH/f83h0wblBVSFtzOB0SNi05C/v9hyTv6+PKFfeAFzwOFnSMIzivsPE7Y+yLYHbOd1hnQP6osnfB8F1zGbloem34s7cmVwQ8Xf/22hsxrIJH3UyqRbtJUoMirNxYo8ZaXTeSE7H7Awd7/GwGccxuJtP6PakdciU/0ZtYfuNOL5WLgFmAW8F9EVnwjcL5XfQVNB0o2kZ23i6YPhBVEdvBmb5xf119P/xO/lqY3Vhi/zxLgUe/1HZreRDsDdT7xyqL7MtMI/ySH5q2R4D7w31zBk1Z+zPU09WWWBsaHnScIax1+RqTvLyg66UT3jQeT6oeB8cF6fvxhiacuZJ7B9fU/tD+LKg/7IPD7bBto6tv3Y3OEnyztQ+ut0OhYRnmvwQTkXwHmaEochwbG++cydgXm9a+QOGq9+eQGxgeXH0xq/rfE4PTBxo2/74MftnWtDPv84zCYyMIS7meBOMK+CQWPy2CizaCl4Ldq/xgInicKfqvz34vB/RHWYi8LTDM4JL6akDJf9IdR9DcjR6R7xB/244xO1P54fzsHt1/0hwvA/VHTGZF9559o/aE3zs8Na4l8ELzn1fsUWErkmN+Kd/x4eXMKTRc0tCnhP5gys8uJ9En5X4/9BKlLLEWkt/mEyOWVU4hcjOLnwQ+AXxG5KtHPjzc559r3q9pEn4Rt4+TsqUT66x/whh8IKXsVeCBkurC6DwDXAhui6r5NpHWywa8XNZ+waWLG5E/nvVaHTPNAyDxbTNPK+NbWPTj+gZDt2Nb4V71tsaGN/bGeSMu5xTw7uG1abIfAvvDXf0PYNCHr22I7RdV5wIvZf43e9sFtHroeIdu2rXVvFlMr+3h79HZuZd+0WtbGe6XFeobEXB08fgg5noJxtrJdo+cZ8z3Uxv5u1zRh+yvkGIler+jjrdkyY00Tss7R07d7fyY0nyY6gFaSytVEvpa/S+Q+EEu912CZPzwxarqwukuJXNHzGTA9UNc/0bibyLXKS4mc9Li6jWmWEjsmf7qVRL7irYuaZqkfd8iygtOExfJwK+v+UmB8cD2iYwobv5VIctjlr2vI/vCvFHouZJ7TO7BtWmyHwL7Y5a1/9Ho222aB9a0j8lU3uJ2CddZ6Mfu/vXg+atv728xfZth6TIzatm2t++Lgvgs5LhcT6ZLZHdzOrey7tsqmt/Je2RjYR60dA3VEvv6/GzI+uF38OD9ffivLXBxWl/DjNmx/t2uaVvZX9DHy+XqE7Mc5IcuMNU30OkdP3+79meicmvCuGxER6VoJPxkrIiJdS4leRCTJKdGLiCQ5JXoRkSSnRC8ikuSU6EVEktz/AyLnPfJc63d9AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "vc=train_data.iloc[:,1].value_counts()\n",
    "plt.figure()\n",
    "plt.plot(vc)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    190.000000\n",
       "mean     201.163158\n",
       "std       11.299188\n",
       "min      168.000000\n",
       "25%      194.000000\n",
       "50%      202.000000\n",
       "75%      209.000000\n",
       "max      227.000000\n",
       "Name: 1, dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.iloc[:,1].value_counts().describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a6394b0f513290f4651cc46792e5ac86.jpeg</td>\n",
       "      <td>ZJL1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2fb89ef2ace869d3eb3bdd3afe184e1c.jpeg</td>\n",
       "      <td>ZJL1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>eda9f3bef2bd8da038f6acbc8355fc25.jpeg</td>\n",
       "      <td>ZJL1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7d93ef45972154aae150b4f9980a79c0.jpeg</td>\n",
       "      <td>ZJL1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>fb901b4f9a8e396c1d0155bccc5e5671.jpeg</td>\n",
       "      <td>ZJL1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       0     1\n",
       "0  a6394b0f513290f4651cc46792e5ac86.jpeg  ZJL1\n",
       "1  2fb89ef2ace869d3eb3bdd3afe184e1c.jpeg  ZJL1\n",
       "2  eda9f3bef2bd8da038f6acbc8355fc25.jpeg  ZJL1\n",
       "3  7d93ef45972154aae150b4f9980a79c0.jpeg  ZJL1\n",
       "4  fb901b4f9a8e396c1d0155bccc5e5671.jpeg  ZJL1"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# label_list分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_labels=pd.read_csv(dir+\"label_list.txt\",sep='\\t',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ZJL1</td>\n",
       "      <td>goldfish</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ZJL10</td>\n",
       "      <td>tarantula</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ZJL100</td>\n",
       "      <td>drumstick</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ZJL101</td>\n",
       "      <td>dumbbell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ZJL102</td>\n",
       "      <td>flagpole</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        0          1\n",
       "0    ZJL1   goldfish\n",
       "1   ZJL10  tarantula\n",
       "2  ZJL100  drumstick\n",
       "3  ZJL101   dumbbell\n",
       "4  ZJL102   flagpole"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_labels.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(230, 2)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_labels.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# attributes_per_class分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_attr=pd.read_csv(dir+\"attributes_per_class.txt\",sep='\\t',header=None,index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "      <th>24</th>\n",
       "      <th>25</th>\n",
       "      <th>26</th>\n",
       "      <th>27</th>\n",
       "      <th>28</th>\n",
       "      <th>29</th>\n",
       "      <th>30</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ZJL1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL10</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.7</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL100</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL101</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.8</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZJL102</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        1    2   3   4   5   6    7    8    9    10 ...   21   22   23   24  \\\n",
       "0                                                   ...                       \n",
       "ZJL1     1  0.0   0   0   0   0  0.5  1.0  0.0  0.0 ...  0.0  0.0  0.0  0.0   \n",
       "ZJL10    1  0.0   0   0   0   0  0.5  0.0  0.0  0.7 ...  0.0  0.0  0.0  0.0   \n",
       "ZJL100   0  0.0   0   0   0   0  0.0  0.0  0.0  1.0 ...  0.0  0.0  0.0  1.0   \n",
       "ZJL101   0  0.0   0   0   0   0  0.8  0.1  0.3  0.1 ...  0.0  1.0  0.0  1.0   \n",
       "ZJL102   0  0.0   0   0   0   0  0.0  0.1  0.0  0.0 ...  0.0  0.0  0.5  0.0   \n",
       "\n",
       "         25  26  27   28  29  30  \n",
       "0                                 \n",
       "ZJL1    0.0   0   0  0.0   0   0  \n",
       "ZJL10   0.0   0   0  0.0   0   0  \n",
       "ZJL100  0.0   0   0  0.0   0   0  \n",
       "ZJL101  0.0   0   0  0.0   0   0  \n",
       "ZJL102  0.0   0   0  0.0   0   0  \n",
       "\n",
       "[5 rows x 30 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_attr.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(230, 30)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_attr.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# class_wordembeddings.txt说明"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_words=pd.read_csv(dir+\"class_wordembeddings.txt\",sep=' ',header=None,index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>291</th>\n",
       "      <th>292</th>\n",
       "      <th>293</th>\n",
       "      <th>294</th>\n",
       "      <th>295</th>\n",
       "      <th>296</th>\n",
       "      <th>297</th>\n",
       "      <th>298</th>\n",
       "      <th>299</th>\n",
       "      <th>300</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>book</th>\n",
       "      <td>0.048733</td>\n",
       "      <td>-0.055083</td>\n",
       "      <td>0.14947</td>\n",
       "      <td>-0.112690</td>\n",
       "      <td>0.098791</td>\n",
       "      <td>0.54334</td>\n",
       "      <td>-0.512040</td>\n",
       "      <td>0.278820</td>\n",
       "      <td>0.11497</td>\n",
       "      <td>-1.33970</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.11647</td>\n",
       "      <td>-0.072080</td>\n",
       "      <td>-0.418210</td>\n",
       "      <td>0.39238</td>\n",
       "      <td>-0.01703</td>\n",
       "      <td>-0.031026</td>\n",
       "      <td>0.254280</td>\n",
       "      <td>0.51352</td>\n",
       "      <td>0.136670</td>\n",
       "      <td>-0.12639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>car</th>\n",
       "      <td>0.464430</td>\n",
       "      <td>0.377300</td>\n",
       "      <td>-0.21459</td>\n",
       "      <td>-0.507680</td>\n",
       "      <td>-0.245760</td>\n",
       "      <td>0.08134</td>\n",
       "      <td>0.101450</td>\n",
       "      <td>0.251550</td>\n",
       "      <td>-0.36152</td>\n",
       "      <td>-1.60300</td>\n",
       "      <td>...</td>\n",
       "      <td>0.19966</td>\n",
       "      <td>-0.409790</td>\n",
       "      <td>-0.103250</td>\n",
       "      <td>-0.04361</td>\n",
       "      <td>0.17564</td>\n",
       "      <td>0.657090</td>\n",
       "      <td>-0.099860</td>\n",
       "      <td>0.49107</td>\n",
       "      <td>0.282150</td>\n",
       "      <td>0.34554</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>player</th>\n",
       "      <td>-0.296690</td>\n",
       "      <td>0.997620</td>\n",
       "      <td>0.20873</td>\n",
       "      <td>-0.135730</td>\n",
       "      <td>0.120000</td>\n",
       "      <td>0.22275</td>\n",
       "      <td>-0.459710</td>\n",
       "      <td>-0.154150</td>\n",
       "      <td>-0.25379</td>\n",
       "      <td>-0.59144</td>\n",
       "      <td>...</td>\n",
       "      <td>0.51049</td>\n",
       "      <td>0.288560</td>\n",
       "      <td>0.075313</td>\n",
       "      <td>0.31128</td>\n",
       "      <td>0.41311</td>\n",
       "      <td>0.417960</td>\n",
       "      <td>0.292250</td>\n",
       "      <td>-0.53699</td>\n",
       "      <td>-0.090443</td>\n",
       "      <td>-0.45571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ball</th>\n",
       "      <td>-0.226940</td>\n",
       "      <td>0.473360</td>\n",
       "      <td>-0.11235</td>\n",
       "      <td>0.292250</td>\n",
       "      <td>0.236600</td>\n",
       "      <td>-0.14951</td>\n",
       "      <td>-0.030909</td>\n",
       "      <td>-0.224720</td>\n",
       "      <td>-0.31172</td>\n",
       "      <td>-1.05470</td>\n",
       "      <td>...</td>\n",
       "      <td>0.63139</td>\n",
       "      <td>-0.053838</td>\n",
       "      <td>0.629640</td>\n",
       "      <td>0.36903</td>\n",
       "      <td>0.40500</td>\n",
       "      <td>-0.256910</td>\n",
       "      <td>0.427080</td>\n",
       "      <td>-0.59365</td>\n",
       "      <td>0.276870</td>\n",
       "      <td>-0.52739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>web</th>\n",
       "      <td>-0.748700</td>\n",
       "      <td>0.220660</td>\n",
       "      <td>0.21329</td>\n",
       "      <td>-0.084496</td>\n",
       "      <td>0.334800</td>\n",
       "      <td>0.39182</td>\n",
       "      <td>-0.183360</td>\n",
       "      <td>-0.079219</td>\n",
       "      <td>0.16892</td>\n",
       "      <td>-1.38040</td>\n",
       "      <td>...</td>\n",
       "      <td>0.27070</td>\n",
       "      <td>-0.077514</td>\n",
       "      <td>-0.860600</td>\n",
       "      <td>0.29805</td>\n",
       "      <td>-0.15174</td>\n",
       "      <td>-0.161850</td>\n",
       "      <td>0.052965</td>\n",
       "      <td>0.44862</td>\n",
       "      <td>-0.469340</td>\n",
       "      <td>-0.31611</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 300 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             1         2        3         4         5        6         7    \\\n",
       "0                                                                            \n",
       "book    0.048733 -0.055083  0.14947 -0.112690  0.098791  0.54334 -0.512040   \n",
       "car     0.464430  0.377300 -0.21459 -0.507680 -0.245760  0.08134  0.101450   \n",
       "player -0.296690  0.997620  0.20873 -0.135730  0.120000  0.22275 -0.459710   \n",
       "ball   -0.226940  0.473360 -0.11235  0.292250  0.236600 -0.14951 -0.030909   \n",
       "web    -0.748700  0.220660  0.21329 -0.084496  0.334800  0.39182 -0.183360   \n",
       "\n",
       "             8        9        10    ...         291       292       293  \\\n",
       "0                                    ...                                   \n",
       "book    0.278820  0.11497 -1.33970   ...    -0.11647 -0.072080 -0.418210   \n",
       "car     0.251550 -0.36152 -1.60300   ...     0.19966 -0.409790 -0.103250   \n",
       "player -0.154150 -0.25379 -0.59144   ...     0.51049  0.288560  0.075313   \n",
       "ball   -0.224720 -0.31172 -1.05470   ...     0.63139 -0.053838  0.629640   \n",
       "web    -0.079219  0.16892 -1.38040   ...     0.27070 -0.077514 -0.860600   \n",
       "\n",
       "            294      295       296       297      298       299      300  \n",
       "0                                                                         \n",
       "book    0.39238 -0.01703 -0.031026  0.254280  0.51352  0.136670 -0.12639  \n",
       "car    -0.04361  0.17564  0.657090 -0.099860  0.49107  0.282150  0.34554  \n",
       "player  0.31128  0.41311  0.417960  0.292250 -0.53699 -0.090443 -0.45571  \n",
       "ball    0.36903  0.40500 -0.256910  0.427080 -0.59365  0.276870 -0.52739  \n",
       "web     0.29805 -0.15174 -0.161850  0.052965  0.44862 -0.469340 -0.31611  \n",
       "\n",
       "[5 rows x 300 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_words.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(230, 300)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_words.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# attribute_list说明"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_attrlabels=pd.read_csv(dir+\"attribute_list.txt\",sep='\\t',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>is animal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>is transportation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>is clothes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>is plant</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>is tableware</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0                  1\n",
       "0  1          is animal\n",
       "1  2  is transportation\n",
       "2  3         is clothes\n",
       "3  4           is plant\n",
       "4  5       is tableware"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_attrlabels.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30, 2)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_attrlabels.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# submit.txt拆分验证集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid=pd.read_csv(dir+\"submit.txt\",sep='\\t',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9f1d3113f1fcb573596ca99ecb712364.jpeg</td>\n",
       "      <td>ZJL178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9f73904f7a72fa7285b80f2ae8286066.jpeg</td>\n",
       "      <td>ZJL178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>619bf8d90e1fa19a7f2966bd38b27ccd.jpeg</td>\n",
       "      <td>ZJL178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6773ca5a1a615fc0d67f836e0772ff46.jpeg</td>\n",
       "      <td>ZJL178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>e1badc8feb1e4d4a6e44eb382d13bc24.jpeg</td>\n",
       "      <td>ZJL178</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       0       1\n",
       "0  9f1d3113f1fcb573596ca99ecb712364.jpeg  ZJL178\n",
       "1  9f73904f7a72fa7285b80f2ae8286066.jpeg  ZJL178\n",
       "2  619bf8d90e1fa19a7f2966bd38b27ccd.jpeg  ZJL178\n",
       "3  6773ca5a1a615fc0d67f836e0772ff46.jpeg  ZJL178\n",
       "4  e1badc8feb1e4d4a6e44eb382d13bc24.jpeg  ZJL178"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8291, 2)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
